# -*- coding:utf-8 -*-

# @Time    : 2023/4/22 04:27
# @Author  : zengwenjia
# @Email   : zengwenjia@lingxi.ai
# @File    : handle_common_sense.py
# @Software: LLM_internal

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

import json
from util_tool import utils


# 将常识知识库中的常识知识转换为instruction的格式
def convert_to_instruction(path, target_path):
    datas = []
    for l in open(path, "r", encoding='utf8'):
        if l.startswith(u'\ufeff'):
            l = l.encode('utf8')[3:].decode('utf8')
        content = json.loads(l)
        datas.append(content)
    instruct_dict_list = []
    for data in datas:
        instruct_dict = {}
        instruct_dict["instruction"] = data["textbox_q_instruction"]
        input = ""
        if data["textbox_q_context"]:
            input = "\n\n" + data["textbox_q_context"] + "\n\n"
        input = input + data["textbox_question"]
        instruct_dict["input"] = input

        output = data["textbox_answer"]
        if data["textbox_answer_analysis"]:
            output = output + "\n\n" + data["textbox_answer_analysis"]
        instruct_dict["output"] = output

        instruct_dict_list.append(instruct_dict)

    utils.jdump(instruct_dict_list, target_path)


if __name__ == "__main__":
    convert_to_instruction("../data_set/common_sense/exam_instructions.json", "common_sense/exam_instructions_new.json")

